In This dataset two experimental tasks were employed using individualized rewards and their subjective values to study (1) passive avoidance and reversal learning (PasRev dataset) and (2) risky decision making (WoF dataset) in offenders with and without psychopathy, as well as healthy controls. The results showed that sufficient subjective value of reward facilitates acquisition but not response reversal in psychopathic offenders.
They did find psychopathic offenders to be guided more by pay-off sizes than probabilities. They did not find any effects of subjective reward value on overall risky decision making. These findings suggest that specific reinforcement learning impairments often found in individuals with psychopathy can be reduced with the use of personalized rewards.
#pip install keras
#pip install tensorflow==2.2.0
import sys
import pandas as pd
import numpy as np
import sklearn
import matplotlib
import keras
import seaborn as sns
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline
from pandas.plotting import scatter_matrix
print ('Python: {}'.format(sys.version))
print ('Pandas: {}'.format(pd.__version__))
print ('Numpy: {}'.format(np.__version__))
print ('Sklearn: {}'.format(sklearn.__version__))
print ('Matplotlib: {}'.format(matplotlib.__version__))
print ('Keras: {}'.format(keras.__version__))
print ('Tensorflow: {}'.format(tf.__version__))
import io
import os, types
import pandas as pd
from botocore.client import Config
import ibm_boto3
def __iter__(self): return 0
# @hidden_cell
# The following code accesses a file in your IBM Cloud Object Storage. It includes your credentials.
# You might want to remove those credentials before you share the notebook.
if os.environ.get('RUNTIME_ENV_LOCATION_TYPE') == 'external':
endpoint_91eebe99af9149ecabdf33325a3f33dd = 'https://s3.us.cloud-object-storage.appdomain.cloud'
else:
endpoint_91eebe99af9149ecabdf33325a3f33dd = 'https://s3.us.cloud-object-storage.appdomain.cloud'
client_91eebe99af9149ecabdf33325a3f33dd = ibm_boto3.client(service_name='s3',
ibm_api_key_id='7VtkP0vgO6_OZ9qCyMnBVfAgZTB4e5mU57YMIVL1NnQG',
ibm_auth_endpoint="https://iam.cloud.ibm.com/oidc/token",
config=Config(signature_version='oauth'),
endpoint_url=endpoint_91eebe99af9149ecabdf33325a3f33dd)
body = client_91eebe99af9149ecabdf33325a3f33dd.get_object(Bucket='assignmentmachinelearning-donotdelete-pr-vk52ui2felqsnh',
Key='Behavioural data WoF_total.xlsx')['Body']
# add missing __iter__ method, so pandas accepts body as file-like object
if not hasattr(body, "__iter__"): body.__iter__ = types.MethodType( __iter__, body )
wof = pd.read_excel(io.BytesIO(body.read()))
# wof_1= pd.read_excel(io.BytesIO(body.read()),'Explanation of variables')
wof.head(5)
body = client_91eebe99af9149ecabdf33325a3f33dd.get_object(Bucket='assignmentmachinelearning-donotdelete-pr-vk52ui2felqsnh',Key='Behavioural data PasRev_total.xlsx')['Body']
# add missing __iter__ method, so pandas accepts body as file-like object
if not hasattr(body, "__iter__"): body.__iter__ = types.MethodType( __iter__, body )
pasrev = pd.read_excel(io.BytesIO(body.read()))
# pasrev_1 = pd.read_excel(io.BytesIO(body.read()),'Explanation of variables')
pasrev.head(5)
wof.shape
wof.info() #datatype information
pdat=wof
pdata=pdat.drop(columns=['PPN','Reward_value_NR'])
# all null values
pdata.isnull().sum()
pdata=pdata.fillna(0)
# null value
pdata.isnull().sum()
No Null value present now
var1 = pdata[["PCL_R_Total","Age","IQ","Session_NR","WoF_ScoreTotal_NR","Reward_value_LR","Session_LR",
"WoF_ScoreTotal_LR","Reward_value_HR","Session_HR","WoF_ScoreTotal_HR"]]
for col1 in var1:
plt.figure(figsize=(7,7))
a=pdata[col1].value_counts().plot.box()
plt.xticks(rotation=60)
plt.show()
# plotting the bar plot of Group (Target Class)
ax=pdata["Group"].value_counts().plot.bar(color=('y','c'),figsize=(8,8))
plt.xlabel("Group", labelpad=15)
plt.ylabel("Count", labelpad=15)
plt.title("Target Class", y=1.02);
for p in ax.patches:
ax.annotate(np.round(p.get_height(),decimals=2),
(p.get_x()+p.get_width()/2., p.get_height()),ha='center',va='center',
xytext=(0, 10),textcoords='offset points')
###comment due
from pandas.plotting import radviz
var2= pdata[["Group","PCL_R_Total","Age","IQ","Session_NR","WoF_ScoreTotal_NR","Reward_value_LR","Session_LR",
"WoF_ScoreTotal_LR","Reward_value_HR","Session_HR","WoF_ScoreTotal_HR"]]
g1=var2.iloc[:17,:]
g2= var2.iloc[18:34,:]
g3= var2.iloc[35:55,:]
def rad_viz(df,Group):
plt.figure(figsize=(10,10))
fig = radviz(df, Group, color=sns.color_palette())
plt.show()
print("Healthy Controls :")
rad_viz(g1,"Group")
print("Non - Psychopathic Offenders :")
rad_viz(g2,"Group")
print("Psychopathic Offenders :")
rad_viz(g3,"Group")
print("All Group Combined :")
rad_viz(var2,"Group")
### Comment Due
from pandas.plotting import parallel_coordinates
def pcoord_viz(df, labels):
plt.figure(figsize=(30,10))
fig = parallel_coordinates(df, labels, color=sns.color_palette())
plt.show()
print("Healthy Controls :")
pcoord_viz(g1,"Group")
print("Non - Psychopathic Offenders :")
pcoord_viz(g2,"Group")
print("Psychopathic Offenders :")
pcoord_viz(g3,"Group")
print("All Group Combined :")
pcoord_viz(var2,"Group")
### Comment Due
# seaborn's kdeplot, plots univariate or bivariate density estimates.
#Size can be changed by tweeking the value used
a1=sns.FacetGrid(pdata, hue="Group", height=5).map(sns.kdeplot, "IQ").add_legend()
a2=sns.FacetGrid(pdata, hue="Group", height=5).map(sns.kdeplot, "PCL_R_Total").add_legend()
a3=sns.FacetGrid(pdata, hue="Group", height=5).map(sns.kdeplot, "WoF_ScoreTotal_HR").add_legend()
plt.show()
### Comment due
# 2D scatter plot
from mpl_toolkits import mplot3d
x= pdata["Educ_Level"]
y=pdata["IQ"]
z=pdata["WoF_ScoreTotal_HR"] ## Total score in the High reward condition
g= pdata["Group"]
fig = plt.figure(figsize = (16, 9))
ax = plt.axes(projection ="3d")
ax.grid(b = True, color ='grey',
linestyle ='-', linewidth = 1,
alpha = 0.2)
sp1=ax.scatter3D(x,y,z, s=200, c=g, cmap=plt.cm.get_cmap("brg",5))
cbar=fig.colorbar(sp1, orientation="vertical", extend="both", shrink = 0.6, aspect = 8)
cbar.set_label(label="Group", size=15)
plt.show()
### Comment Due
#plotting al the categorical values of the clients
cat_var = ['Educ_Level',"Order_Rewards","Order_Tasks","Session_NR","Session_LR","Session_HR"]
for col in cat_var:
plt.figure(figsize=(7,7))
ax=pdata[col].value_counts().plot.bar()
plt.title(col,fontsize=18)
plt.tight_layout()
for p in ax.patches:
ax.annotate(np.round(p.get_height(),decimals=2),
(p.get_x()+p.get_width()/2, p.get_height()),ha='center',va='center',
xytext=(0, 10),textcoords='offset points')
### Comment Due
#subscription status in different domain in jobs
print(pd.crosstab(pdata['Group'],pdata["Session_HR"]))
Group=pd.crosstab(pdata['Group'],pdata['Session_HR'])
Group.div(Group.sum(1).astype(float), axis=0).plot(kind="bar", stacked=True, figsize=(8,8))
plt.xlabel('Group')
plt.ylabel('Session_HR')
### comment due
#subscription status in different domain in jobs
print(pd.crosstab(pdata['Group'],pdata["Session_LR"]))
Group=pd.crosstab(pdata['Group'],pdata['Session_LR'])
Group.div(Group.sum(1).astype(float), axis=0).plot(kind="bar", stacked=True, figsize=(8,8))
plt.xlabel('Group')
plt.ylabel('Session_LR')
### comment due
#subscription status in different domain in jobs
print(pd.crosstab(pdata['Group'],pdata["Session_NR"]))
Group=pd.crosstab(pdata['Group'],pdata['Session_NR'])
Group.div(Group.sum(1).astype(float), axis=0).plot(kind="bar", stacked=True, figsize=(8,8))
plt.xlabel('Group')
plt.ylabel('Session_NR')
### Comment Due
sns.set_style('whitegrid');
sns.FacetGrid(pdata, hue='Group', height=8) \
.map(plt.scatter, "Age", "PCL_R_Total") \
.add_legend();
plt.show()
## comment due
# Age vs Group
plot = sns.FacetGrid(pdata, hue='Group',aspect=5)
plot.map(sns.kdeplot,'Age', shade= True)
plot.add_legend()
### comment due
#pip install yellowbrick
from yellowbrick.features import Rank2D
fig, ax2 = plt.subplots( figsize=(12, 12))
vzr = Rank2D(ax=ax2)
vzr.fit(var2)
vzr.transform(var2)
sns.despine(left=True, bottom=True)
vzr.poof()
pdata.describe() #calculate some statistical data like percentile, mean and std of the numerical values for each features
from sklearn.preprocessing import StandardScaler
x = pdata['Group'] #target class
y = pdata.drop('Group', axis = 1) #all features
#scaling features with standardscaler
s=StandardScaler()
s.fit(y)
sdata=s.transform(y)
sdata
pdata.shape
#dimension reduction with principal component analysis
from sklearn.decomposition import PCA
p = PCA(n_components=20) # dimension reduce to 20 from 61
p.fit(sdata)
pca=p.transform(sdata)
print ("Before PCA \n",sdata.shape)
print("\nAfter PCA\n",pca.shape)
print("")
print (pca)
#2D PCA Scatter Plot
import plotly.express as px
from sklearn.decomposition import PCA
colormap = np.array(['r', 'g', 'b'])
fig = px.scatter(pca, x=4, y=1,color=colormap[pdata['Group']])
fig.show()
##Bad
#All PCA Components
labels = {
str(i): f"PC {i+1} ({var:.1f}%)"
for i, var in enumerate(p.explained_variance_ratio_ * 100)
}
fig = px.scatter_matrix(
pca,
labels=labels,
dimensions=range(4),
color=pdata["Group"]
)
fig.update_traces(diagonal_visible=False)
fig.show()
# create X and Y datasets for training and testing
from sklearn import model_selection
x_train, x_test, y_train, y_test = model_selection.train_test_split(pca,x, test_size = 0.25,random_state= 1)
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)
from sklearn.svm import SVC
classifier = SVC(kernel = 'linear', random_state = 1)
classifier.fit(x_train, y_train)
classifier.score(x_test,y_test)
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score,classification_report
sv=classifier.predict(x_test)
print("Prediction accuracy results :")
print(accuracy_score(y_test,sv))
print(classification_report(y_test,sv))
import seaborn as sns
#model prediction result visualization with confusion matrix
cm=confusion_matrix(y_test,sv)
plt.figure(figsize=(5,5))
sns.heatmap(cm,annot=True,linewidths=4,vmin=-1, vmax=1, center=0,
cmap="coolwarm",square=True)
plt.xlabel('Predicted')
plt.ylabel('Truth')
#Random forest classifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
rfc=RandomForestClassifier(n_estimators=50, random_state = 1)
rfc.fit(x_train,y_train)
rfc.score(x_test,y_test)
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score,classification_report
rf=rfc.predict(x_test)
print("Prediction accuracy results :")
print(accuracy_score(y_test,rf))
print(classification_report(y_test,rf))
#model prediction result visualization with confusion matrix
cm_1=confusion_matrix(y_test,rf)
plt.figure(figsize=(5,5))
sns.heatmap(cm,annot=True,linewidths=4,vmin=-1, vmax=1, center=0,
cmap="coolwarm",square=True)
plt.xlabel('Predicted')
plt.ylabel('Truth')
from sklearn.neighbors import KNeighborsClassifier
knn_c=KNeighborsClassifier()
knn_c.fit(x_train,y_train)
knn_c.score(x_test,y_test)
kn=knn_c.predict(x_test)
print("Prediction accuracy results :")
print(accuracy_score(y_test,kn)*100)
print(classification_report(y_test,kn))
from sklearn.tree import DecisionTreeClassifier
# defining the decision tree model with depth of 4, you can tune it further to improve the accuracy score
clf = DecisionTreeClassifier(max_depth=4, random_state=0)
# fitting the decision tree model
clf.fit(x_train,y_train)
# making prediction on the validation set
predict = clf.predict(x_test)
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score,classification_report
print("Prediction accuracy results :")
# calculating the accuracy score
print(accuracy_score(y_test, predict))
print(classification_report(y_test,predict))
from sklearn.linear_model import LogisticRegression
# defining the logistic regression model
lreg = LogisticRegression()
# fitting the model on X_train and y_train
lreg.fit(x_train,y_train)
# making prediction on the validation set
prediction = lreg.predict(x_test)
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score,classification_report
print("Prediction accuracy results :")
# calculating the accuracy score
print(accuracy_score(y_test, prediction))
print(classification_report(y_test,prediction))
#categorical conversion
y_train = tf.keras.utils.to_categorical(y_train, 3)
y_test = tf.keras.utils.to_categorical(y_test, 3)
print(y_test.shape)
print(y_train.shape)
#Build and train the deep learning model
from keras.models import Sequential
from keras.wrappers.scikit_learn import KerasClassifier
from keras.layers import Dense
from keras.optimizers import Adam
# define a function to build the keras model
def create_model():
# create model
model= Sequential()
#first hidden layer
model.add(Dense(12, input_dim=x_train.shape[1], kernel_initializer='normal', activation='relu'))
#second hidden layer
model.add(Dense(25, kernel_initializer='normal', activation='relu'))
#third hidden layer
model.add(Dense(8, kernel_initializer='normal', activation='relu'))
#output layer
model.add(Dense(3, activation='softmax'))
# compile model
model.compile(optimizer ='adam',loss='categorical_crossentropy',metrics=['accuracy'])
return model
model = create_model()
print(model.summary())
#model fitting
model.fit(x_train, y_train, epochs=200, batch_size=30, verbose = 1)
# generate classification report using predictions for categorical model
from sklearn.metrics import classification_report, accuracy_score
kdl=np.round(model.predict(x_test)).astype(int)
print("Prediction accuracy results :")
print(accuracy_score(y_test,kdl)*100)
print(classification_report(y_test,kdl))